
// PRELIMINARIES
** set path for each user
clear all
set more off 

** prep ACS 2012 & 2016 data
{
use "$dir/data/raw-data/acs_data.dta"
drop serial hhwt gq pernum
drop if age<=17 // Restrict attention to adults
gen d_age18to44 = (age<=44)
gen d_age65plus = (age>=65)

rename age age_1 

gen female = sex==2

gen nonhispwhite = (race==1)
gen africanamer = (race==2)
gen amerindian = (race==3)
gen asian = (race==4 | race==5 | race==6)
gen latino = (hispan!=0)

gen highschool = (educ>=6)
gen college = (educ>=10)

gen employed = (empstat==1)
gen unemployed = (empstat==2)

gen outoflaborforce = (empstat==3)

gen thenortheast = (region==11 | region==12)
label var thenortheast "Lives in northeast (Census region I)"
gen themidwest = (region==21 | region==22)
label var themidwest "Lives in midwest (Census region II)"
gen thesouth = (region==31 | region==32 | region==33)
label var thesouth "Lives in south (Census region III)"
gen thewest = (region==41 | region==42)
label var thewest "Lives in west (Census region IV)"

*** create income categories consistent with ALP 
label define incomelabels 1 "Less than 10,000" 2 "10,000 to 19,999 " 3 "20,000 to 29,999" 4 "30,000 to 39,999" ///
 5 "40,000 to 49,999" 6 "50,000 to 59,999" 7 "60,000 to 74,999" 8 "75,000 to 99,999" 9 "100,000 to 124,999" ///
 10 "125,000 to 199,999" 11 "200,000 and over"

replace hhincome = . if hhincome<0 | hhincome == 9999999
egen incomecat = cut(hhincome), at(0,10000,20000,30000,40000,50000,60000,75000,100000,125000,200000,10000000)
label values incomecat incomelabels

gen approxincome = hhincome/1000
compress
save "$dir/data/outputs/acs2012_2016adults.dta", replace
}
/********************** PREP RAW DECISION DATA **********************/
{
foreach data in "ALP1" "ALP2"{
local data = "`data'"
global data = "`data_name'"
use "$dir/data/raw-data/`data'", clear
di "`data'"
di "`dir'"

// IDENTIFY SUBJECTS WHO COMPLETED THE EXPERIMENT

// Count number of practice rounds completed by each subject (practice==1)
label var practice "Practice question (not incentivized)"
label define practicelabel 0 "Incentivized" 1 "Practice round" 2 "Comprehension question"
label values practice practicelabel

// Create indicator for completing the comprehension check (type==2)
bys prim_key:  egen tempvar = count(self!=.) if practice==2 
bys prim_key:  egen compcheck = max(tempvar)
replace compcheck = 0 if compcheck==.
label var compcheck "Completed comprhension check question"
drop tempvar

bys prim_key: egen tempvar = count(self) if practice==1
bys prim_key:  egen pracrounds = max(tempvar)
label var pracrounds "Number of practice rounds completed"
drop tempvar

// Create indicator for completing Round 50 (even if there is a computer glitch)
bys prim_key:  egen tempvar = count(self) if round==50 
bys prim_key:  egen lastround = max(tempvar)
replace lastround = 0 if lastround==.
label var lastround "Completed Round 50"
drop tempvar

// Drop rounds where subject did not choose on/near budget line (computer glitches)
gen price = self_max/other_max
label var price "Price of tokens to other"
gen lnprice = ln(price)
label var lnprice "Log price of giving"
gen amtspent = self + other*price
gen budgetdiff = abs(self_max - amtspent)

// Drop incentivized decisions where subject is more than one token away from budget line
// 65 decisions in ALP1 and 26 decisions in ALP2 (as of 1/10/17)
drop if (self==. | other==. | budgetdiff>1) & practice==0
drop budgetdiff

// Count number of incentivized rounds completed by each subject (practice==0)
bys prim_key:  egen tempvar = count(self) if practice==0
bys prim_key:  egen rounds = max(tempvar)
replace rounds = 0 if rounds==.
label var rounds "Number of decision problems completed"
drop tempvar


// Create indicator for completing the experiment 
// Define complete as:  either doing ALL decision problems or doing 40+ including the last round
egen idtag = tag(prim_key)
gen complete = (rounds==50) 
label var complete "Subject completed experiment"
replace complete = 1 if rounds>40 & lastround==1
// Generate summary measures of allocation decisions
gen owntokens =  self/(self+other) if practice==0
label var owntokens "Tokens to self"
egen avowntokens=mean(owntokens), by(prim_key)
label var avowntokens "Average value of tokens to self"
gen ownshare =  self/self_max if practice==0
label var ownshare "Budget share spent on self"
egen avownshare=mean(ownshare), by(prim_key)
label var avownshare "Average share of budget spent on self"

// SELECTION:  WHO LOGS INTO THE EXPERIMENT?
sort prim_key
merge m:1 prim_key using "$dir/data/raw-data/`data'allsubjects"
gen neverstart = (_merge==2)
label var neverstart "Subject never logged in to experiment"
replace idtag = 1 if neverstart==1
replace complete = 0 if complete==. & neverstart==1
drop _merge


// Create data set for CES/CCEI estimation 
sort prim_key practice round
egen subjectid = group(prim_key)
preserve
 sort subjectid round
 keep prim_key subjectid round self_max other_max self other 
 save "$dir/data/outputs/`data'forestimation_primkeys", replace
 restore 
// Create data set with one obs per subject for merging with parameters, analysis
keep if idtag==1
keep subjectid prim_key complete neverstart rounds pracrounds avowntokens avownshare
foreach var of varlist subjectid complete neverstart rounds pracrounds /// 
 avowntokens avownshare {
 if "ALP1"=="`data'" {
 rename `var' `var'1 
 }
 if "ALP2"=="`data'" {
 rename `var' `var'2 
 }
}
sort prim_key
save "$dir/data/outputs/`data'withoutparams", replace
 
}
}

****** Prep  CCEI & CES data ********
{
clear all
import excel "$dir/data/raw-data/GARP1", firstrow
rename ID subjectid
rename ofobs garprounds
sort subjectid 
foreach var of varlist _all{
rename `var' `var'1
}
save "$dir/data/outputs/GARP1", replace
clear
import excel "$dir/data/raw-data/GARP2", firstrow
rename ID subjectid
rename ofobs garprounds
sort subjectid 
foreach var of varlist _all{
rename `var' `var'2

}
save "$dir/data/outputs/GARP2", replace
clear
import excel "$dir/data/raw-data/CES1", firstrow
rename id subjectid
sort subjectid
foreach var of varlist _all{
rename `var' `var'1
}
save "$dir/data/outputs/CES1", replace
clear
import excel "$dir/data/raw-data/CES2", firstrow
rename id subjectid
sort subjectid
foreach var of varlist _all{
rename `var' `var'2
}
save "$dir/data/outputs/CES2", replace

** other stuff

use "$dir/data/outputs/ALP2withoutparams"
*keep if complete2==1
merge 1:1 subjectid2 using "$dir/data/outputs/CES2"
drop _m
merge 1:1 subjectid2 using "$dir/data/outputs/GARP2"
drop _m
save "$dir/data/outputs/ALP2withparams", replace
}

***clean political data 
{
**** merge all the PEPS rounds in a unique dataset *********

use "$dir/data/raw-data/PEPS1"
merge 1:1 prim_key using "$dir/data/raw-data/PEPS2"
gen incomplete = (_m==1 | _m==2)
gen q1data = (_m==1 | _m==3)
gen q2data = (_m==2 | _m==3)
drop _m
sort prim_key
merge 1:1 prim_key using "$dir/data/raw-data/PEPS3"
replace incomplete = 1 if incomplete!=1 & (_m==1 | _m==2)
gen q3data = (_m==2 | _m==3)
replace q1data = 0 if _m==2
replace q2data = 0 if _m==2
drop _m
sort prim_key
merge 1:1 prim_key using "$dir/data/raw-data/PEPS4"
replace incomplete = 1 if incomplete!=1 & (_m==1 | _m==2)
gen q4data = (_m==2 | _m==3)
replace q1data = 0 if _m==2
replace q2data = 0 if _m==2
replace q3data = 0 if _m==2
drop _m
sort prim_key

merge 1:1 prim_key using "$dir/data/raw-data/PEPS5"
replace incomplete = 1 if incomplete!=1 & (_m==1 | _m==2)
gen q5data = (_m==2 | _m==3)
replace q1data = 0 if _m==2
replace q2data = 0 if _m==2
replace q3data = 0 if _m==2
replace q4data = 0 if _m==2
drop _m
sort prim_key

merge 1:1 prim_key using "$dir/data/raw-data/PEPS6"
replace incomplete = 1 if incomplete!=1 & (_m==1 | _m==2)
gen q6data = (_m==2 | _m==3)
replace q1data = 0 if _m==2
replace q2data = 0 if _m==2
replace q3data = 0 if _m==2
replace q4data = 0 if _m==2
replace q5data = 0 if _m==2


gen datapattern = q1data*100000+q2data*10000+q3data*1000+q4data*100+q5data*10+q6data

drop q* _m
order _all, alphabetic
order prim_key, first
sort prim_key
save "$dir/data/outputs/PEPS_all.dta", replace



use "$dir/data/outputs/PEPS_all.dta"
** baseline pre-primary survey 
gen willvote_rep_prim = primary_q1 == 2 if primary_q1 ~=. 
gen  willvote_dem_prim = primary_q1 == 1 if primary_q1 ~= .
gen Trump_preprimary1 = repnom_1_q1 == 1 if repnom_1_q1~=. 
gen Clinton_preprimary1 = demnom_1_q1 == 1 if demnom_1_q1~=. 
gen Sanders_preprimary1 = demnom_1_q1 == 2 if demnom_1_q1~=.
** pre-primary (wave2) survey only to people who said were going to vote in the primary in baseline survey 
gen Trump_preprimary2 = repnom_1_q2 == 1 if repnom_1_q2 ~= .
gen Clinton_preprimary2 = demnom_1_q2 == 1 if demnom_1_q2 ~=.
gen Sanders_preprimary2 = demnom_1_q2 == 2 if demnom_1_q2 ~=.

** after-primary wave, asking who they wanted as nominee and chance they would have voted for dem or rep 
gen Trump_postprimary = repnom3_q3 == 1 if repnom3_q3~=. 
gen Clinton_postprimary = demnom3_q3 == 1 if demnom3_q3~=. 
gen Sanders_postprimary = demnom3_q3 == 2 if demnom3_q3~=.
rename whovote_republican_q3 Rep_postprimary 
rename whovote_democrat_q3 Dem_postprimary 


** support for Trump and Clinton in post-conventions (% chance of voting for them), pre-election waves 
rename whovote_trump_q4 Trump_preelection1
rename whovote_trump_q5 Trump_preelection2
rename whovote_clinton_q4 Clinton_preelection1
rename whovote_clinton_q5 Clinton_preelection2

** post-election wave 
gen votedelection = vote_q6 == 1 if vote_q6 ~= .
gen votedTrump = votedfor_q6 == 2 if votedfor_q6 ~= .
gen votedClinton = votedfor_q6 == 1 if votedfor_q6 ~= .


merge 1:1 prim_key using "/$dir/data/raw-data/ALP2_primkeys.dta"
keep if complete2 !=.
keep if _m == 3
drop _m 
save "$dir/data/outputs/ALP2_politicaldata.dta", replace
}


/********************** CLEAN ALP1 & ALP2 Demographics DATA **********************/
{
clear

* load demographic data for ALP2

use "$dir/data/raw-data/ALP_MS90002_2017_01_24_09_27_16.dta"

drop *householdmember_age* *householdmember_gender*  *_hoer_* ms90002_eter *_hoon_* *householdmember_relation*

foreach stub in language tsend gender birthyear ///
 calcage statereside borninus stateborn citizenus currentlivingsituation ethnicity ///
 highesteducation hispaniclatino mexicans1 mexicans2 mexicans3 mexicans4 hispaniclatino_detail ///
 currentjobstatuss1 currentjobstatuss2 currentjobstatuss3 currentjobstatuss4 currentjobstatuss5 currentjobstatuss6 ///
 currentjobstatuss7 doyouwork typework householdmembers familyincome familyincome_part2 recruitment_type ///
 recruit_type first_contact contact_type {
 rename ms90002_`stub' `stub'
}

rename ms90002_prim_key prim_key
order _all, alphabetic
order prim_key, first
drop if prim_key==""
sort prim_key

* keep only people invited to our experiment 
merge m:1 prim_key using "$dir/data/raw-data/ALP2_primkeys.dta", keepusing(complete2)

rename complete2 complete
gen missing_demographics = 1 if _merge==2
/// 56 people don't have demographics data, but they also didn't complete the experiment 
drop _merge


* gender
gen female = (gender==2) if gender!=.
label var female "Female"

* age 
gen age = calcage 
gen d_age18to44 = (age<=44)
gen d_age65plus = (age>=65)


* education
recode highested (3/8=1) (9=2) (10/12=3) (13=4) (14/16=5), gen(education)
label define edulabels 1 "Less than HS" 2 "Completed HS" 3 "Some college" 4 "Undergraduate" 5 "Graduate"
label values education edulabels
label var education "Education"
gen lessthanHS = (educ==1) if educ!=.
label var lessthanHS "Did not complete high school"
gen highschool = (educ>=2) if educ!=.
label var highschool "Completed high school"
gen somecollege = (educ==3) if educ!=.
label var somecollege "Some college"
gen college = (educ>=4) if educ!=.
label var college "Completed college"

* HH income
gen approxincome = .
replace approxincome = 2500 if familyincome==1
replace approxincome = 6250 if familyincome==2
replace approxincome = 8750 if familyincome==3
replace approxincome = 11250 if familyincome==4
replace approxincome = 13750 if familyincome==5
replace approxincome = 17500 if familyincome==6
replace approxincome = 22500 if familyincome==7
replace approxincome = 27500 if familyincome==8
replace approxincome = 32500 if familyincome==9
replace approxincome = 37500 if familyincome==10
replace approxincome = 45000 if familyincome==11
replace approxincome = 55000 if familyincome==12
replace approxincome = 67500 if familyincome==13
replace approxincome = 87500 if familyincome==14 & familyincome_part2==1
replace approxincome = 112500 if familyincome==14 & familyincome_part2==2
replace approxincome = 162500 if familyincome==14 & familyincome_part2==3
replace approxincome = 206250 if familyincome==14 & familyincome_part2==4
replace approxincome = approxincome/1000
label var approxincome "Approximate HH (in thousands)"
recode familyincome (1/3 = 1) (4/6=2) (7/8=3) (9/10=4) (11=5) (12=6) (13=7) (14=8), gen(incomecat)
replace incomecat = 9 if incomecat==8 & familyincome_part2==2
replace incomecat = 10 if incomecat==8 & familyincome_part2==3
replace incomecat = 11 if incomecat==8 & familyincome_part2==4
label define incomelabels 1 "Less than 10,000" 2 "10,000 to 19,999 " 3 "20,000 to 29,999" 4 "30,000 to 39,999" ///
 5 "40,000 to 49,999" 6 "50,000 to 59,999" 7 "60,000 to 74,999" 8 "75,000 to 99,999" 9 "100,000 to 124,999" ///
 10 "125,000 to 199,999" 11 "200,000 and over"
label values incomecat incomelabels
label var incomecat "HH income category"

*race/ethnicity
drop mexicans*
gen caucasian = (ethnicity==1) if ethnicity!=.
label var caucasian "Caucasian"
gen africanamer = (ethnicity==2) if ethnicity!=.
label var africanamer "African American"
gen latino = (hispaniclatino==1) 
label var latino "Hispanic/Latino"
gen nonhispwhite = (caucasian==1 & latino==0) if ethnicity!=.
label var nonhispwhite "Non-Hispanic Caucasian"
gen otherethnic = (ethnicity!=1 & ethnicity!=2 & latino==0) if ethnicity!=.
label var otherethnic "Other race or ethnic group"



* state + census region
decode statereside, gen(statename)
egen statecode = ends(statename), punct(" ") last
replace statecode = "DC" if regexm(statecode,"D.C")
gen censusregion = "South"
replace censusregion = "Northeast" if statereside==7 | statereside==19 | statereside==21 | statereside==29 | statereside==39 ///
 | statereside==30 | statereside==32 | statereside==38
replace censusregion = "Midwest" if statereside==13 | statereside==14 | statereside==22 | statereside==35 | statereside==49 ///
 | statereside==15 | statereside==16 | statereside==23 | statereside==25 | statereside==27 | statereside==34 | statereside==41
replace censusregion = "West" if statereside==3 | statereside==6 | statereside==12 | statereside==26 | statereside==28 ///
 | statereside==31 | statereside==44 | statereside==5 | statereside==37 | statereside==47
label var censusregion "Census region"
gen thenortheast = (censusregion=="Northeast")
label var thenortheast "Lives in northeast (Census region I)"
gen themidwest = (censusregion=="Midwest")
label var themidwest "Lives in midwest (Census region II)"
gen thesouth = (censusregion=="South")
label var thesouth "Lives in south (Census region III)"
gen thewest = (censusregion=="West")
label var thewest "Lives in west (Census region IV)"
gen censusdiv = censusregion
label var censusdiv "Census division"
replace censusdiv = "New England" if censusdiv=="Northeast"
replace censusdiv = "Mid Atlantic" if censusdiv=="New England" & (statereside==30 | stateres==32 | stateres==38)
replace censusdiv = "West North Central" if censusdiv=="Midwest"
replace censusdiv = "East North Central" if censusdiv=="West North Central" & (statereside==13 | stateres==14 | stateres==22 /// 
  | stateres==35 | stateres==49)
replace censusdiv = "South Atlantic" if censusdiv=="South"
replace censusdiv = "East South Central" if censusdiv=="South Atlantic" & (statereside==2 | stateres==17 | stateres==24 /// 
  | stateres==42)
replace censusdiv = "West South Central" if censusdiv=="South Atlantic" & (statereside==4 | stateres==18 | stateres==36 /// 
  | stateres==43)
replace censusdiv = "Mountain" if censusdiv=="West"
replace censusdiv = "Pacific" if censusdiv=="Mountain" & (statereside==5 | stateres==37 | stateres==47)
tab censusdiv, gen(div)
rename div1 midwest
label var midwest "Midwest (IL, IN, MI, OH, WI)"
rename div2 deepsouth
label var deepsouth "Southern region (AL, KY, MS, TN)"
rename div3 midatlantic
label var midatlantic "Mid Atlantic region (NJ, NY, PA)"
rename div4 mountain
label var mountain "Mountain region (AZ, CO, ID, MT, NM, NV, UT)"
rename div5 newengland
label var newengland "New England region (CT, NH, MA, ME, RI, VT)"
rename div6 pacific
label var pacific "Pacific region (CA, OR, WA)"
rename div7 southatlantic
label var southatlantic "South Atlantic region (DC, DE, FL, GA, MD, NC, SC, VA, WV)"
rename div8 northplains
label var northplains "Northern plains region (IA, KS, MN, MO, ND, NE, SD)"
rename div9 southplains
label var southplains "Southern plains region (AR, LA, OK, TX)"

* state of birth
decode stateborn, gen(tempvar)
egen birthstate = ends(tempvar), punct(" ") last
replace birthstate = "DC" if regexm(birthstate,"D.C")
replace birthstate = "PR" if birthstate=="RICO"
gen migrated = (statecode!=birthstate) if !missing(statecode) & !missing(birthstate)
label var migrated "Does not reside in state of birth"
drop tempvar
 
* work status
gen workstatus = "Employed" if currentjobstatuss1==1
replace workstatus = "Unemployed" if workstatus=="" & currentjobstatuss2==2
replace workstatus = "Retired" if workstatus=="" & currentjobstatuss5==5
replace workstatus = "Disabled" if workstatus=="" & currentjobstatuss4==4
replace workstatus = "Homemaker" if workstatus=="" & currentjobstatuss6==6
replace workstatus = "Other" if workstatus=="" & (currentjobstatuss3==3 | currentjobstatuss7==7)
label var workstatus "Current employment status"
gen employed = (workstatus=="Employed")
label var employed "Employed"
gen unemployed = (workstatus=="Unemployed")
label var unemployed "Unemployed"
gen retired = (workstatus=="Retired")
label var retired "Retired"
gen homemaker = (workstatus=="Homemaker")
label var homemaker "Homemaker"
gen disabled = (workstatus=="Disabled")
label var disabled "Disabled"
gen otherwork = (workstatus=="Other")
label var otherwork "Other work status"
recode typework (1/11 = 0) (12/22=1), gen(bluecollar)
replace bluecollar = 0 if typework==16 | typework==17
replace bluecollar = . if typework==0
label var bluecollar "Works in blue collar job"
gen outoflaborforce = (employed==0 & unemployed==0)



* marital status
gen married = (currentliving==1) if currentliving!=.
label var married "Married"
gen nevermarried = (currentliving==5) if currentliving!=.
label var nevermarried "Never married"
gen othermarital = (married==0 & nevermarried==0) if currentliving!=.
label var othermarital "Other marital status"



sort prim_key

foreach var of varlist _all{
rename `var' `var'_2
}

rename prim_key_2 prim_key

save "$dir/data/outputs/ALP2demographics", replace




**** clean ALP1 demographics and rename those variables with 1 *******
clear
use "$dir/data/raw-data/ALP_MS90002_2013_10_22_08_47_24"

drop *householdmember_age* *householdmember_gender* v* *_householdmember_relation ms90002_ethnicity_asianpacificis

foreach stub in language tsend gender birthmonth birthday birthyear ///
 calcage statereside borninus stateborn citizenus currentlivingsituation ethnicity ///
 highesteducation hispaniclatino mexicans1 mexicans2 mexicans3 mexicans4 hispaniclatino_detail ///
 currentjobstatuss1 currentjobstatuss2 currentjobstatuss3 currentjobstatuss4 currentjobstatuss5 currentjobstatuss6 ///
 currentjobstatuss7 doyouwork typework householdmembers webtv familyincome familyincome_part2 recruitment_type ///
 first_contact contact_type {
 rename ms90002_`stub' `stub'
}

rename ms90002_prim_key prim_key
order _all, alphabetic
order prim_key, first
drop if prim_key==""
sort prim_key

merge m:1 prim_key using "$dir/data/raw-data/ALP1_primkeys.dta", keepusing(complete)
drop if _merge==1 
gen missing_demographics = 1 if _merge==2
*keep if complete == 1 
drop  _merge 

* now add flag for those that completed in 2016 
merge m:1 prim_key using "$dir/data/raw-data/ALP2_primkeys.dta", keepusing(complete2)
drop if _merge==2
drop _m 


* gender
gen female = (gender==2) if gender!=.
label var female "Female"

* age 
gen age = 113 - birthyear 
replace age = . if age==17
gen age18to44 = (age<=44)
gen age65plus = (age>=65)

* education
recode highested (3/8=1) (9=2) (10/12=3) (13=4) (14/16=5), gen(education)
label define edulabels 1 "Less than HS" 2 "Completed HS" 3 "Some college" 4 "Undergraduate" 5 "Graduate"
label values education edulabels
label var education "Education"
gen lessthanHS = (educ==1) if educ!=.
label var lessthanHS "Did not complete high school"
gen highschool = (educ>=2) if educ!=.
label var highschool "Completed high school"
gen somecollege = (educ==3) if educ!=.
label var somecollege "Some college"
gen college = (educ>=4) if educ!=.
label var college "Completed college"

* HH income
gen approxincome = .
replace approxincome = 2500 if familyincome==1
replace approxincome = 6250 if familyincome==2
replace approxincome = 8750 if familyincome==3
replace approxincome = 11250 if familyincome==4
replace approxincome = 13750 if familyincome==5
replace approxincome = 17500 if familyincome==6
replace approxincome = 22500 if familyincome==7
replace approxincome = 27500 if familyincome==8
replace approxincome = 32500 if familyincome==9
replace approxincome = 37500 if familyincome==10
replace approxincome = 45000 if familyincome==11
replace approxincome = 55000 if familyincome==12
replace approxincome = 67500 if familyincome==13
replace approxincome = 87500 if familyincome==14 & familyincome_part2==1
replace approxincome = 112500 if familyincome==14 & familyincome_part2==2
replace approxincome = 162500 if familyincome==14 & familyincome_part2==3
replace approxincome = 206250 if familyincome==14 & familyincome_part2==4
replace approxincome = approxincome/1000
label var approxincome "Approximate HH (in thousands)"
recode familyincome (1/3 = 1) (4/6=2) (7/8=3) (9/10=4) (11=5) (12=6) (13=7) (14=8), gen(incomecat)
replace incomecat = 9 if incomecat==8 & familyincome_part2==2
replace incomecat = 10 if incomecat==8 & familyincome_part2==3
replace incomecat = 11 if incomecat==8 & familyincome_part2==4
label define incomelabels 1 "Less than 10,000" 2 "10,000 to 19,999 " 3 "20,000 to 29,999" 4 "30,000 to 39,999" ///
 5 "40,000 to 49,999" 6 "50,000 to 59,999" 7 "60,000 to 74,999" 8 "75,000 to 99,999" 9 "100,000 to 124,999" ///
 10 "125,000 to 199,999" 11 "200,000 and over"
label values incomecat incomelabels
label var incomecat "HH income category"

*race/ethnicity
drop mexicans*
gen caucasian = (ethnicity==1) if ethnicity!=.
label var caucasian "Caucasian"
gen africanamer = (ethnicity==2) if ethnicity!=.
label var africanamer "African American"
gen latino = (hispaniclatino==1) 
label var latino "Hispanic/Latino"
gen nonhispwhite = (caucasian==1 & latino==0) if ethnicity!=.
label var nonhispwhite "Non-Hispanic Caucasian"
gen otherethnic = (ethnicity!=1 & ethnicity!=2 & latino==0) if ethnicity!=.
label var otherethnic "Other race or ethnic group"


* state + census region
gen statecode = statereside
gen censusregion = "South"
replace censusregion = "Northeast" if statereside==7 | statereside==19 | statereside==21 | statereside==29 | statereside==39 ///
 | statereside==30 | statereside==32 | statereside==38
replace censusregion = "Midwest" if statereside==13 | statereside==14 | statereside==22 | statereside==35 | statereside==49 ///
 | statereside==15 | statereside==16 | statereside==23 | statereside==25 | statereside==27 | statereside==34 | statereside==41
replace censusregion = "West" if statereside==3 | statereside==6 | statereside==12 | statereside==26 | statereside==28 ///
 | statereside==31 | statereside==44 | statereside==5 | statereside==37 | statereside==47
label var censusregion "Census region"
gen thenortheast = (censusregion=="Northeast")
label var thenortheast "Lives in northeast (Census region I)"
gen themidwest = (censusregion=="Midwest")
label var themidwest "Lives in midwest (Census region II)"
gen thesouth = (censusregion=="South")
label var thesouth "Lives in south (Census region III)"
gen thewest = (censusregion=="West")
label var thewest "Lives in west (Census region IV)"
gen censusdiv = censusregion
label var censusdiv "Census division"
replace censusdiv = "New England" if censusdiv=="Northeast"
replace censusdiv = "Mid Atlantic" if censusdiv=="New England" & (statereside==30 | stateres==32 | stateres==38)
replace censusdiv = "West North Central" if censusdiv=="Midwest"
replace censusdiv = "East North Central" if censusdiv=="West North Central" & (statereside==13 | stateres==14 | stateres==22 /// 
  | stateres==35 | stateres==49)
replace censusdiv = "South Atlantic" if censusdiv=="South"
replace censusdiv = "East South Central" if censusdiv=="South Atlantic" & (statereside==2 | stateres==17 | stateres==24 /// 
  | stateres==42)
replace censusdiv = "West South Central" if censusdiv=="South Atlantic" & (statereside==4 | stateres==18 | stateres==36 /// 
  | stateres==43)
replace censusdiv = "Mountain" if censusdiv=="West"
replace censusdiv = "Pacific" if censusdiv=="Mountain" & (statereside==5 | stateres==37 | stateres==47)
tab censusdiv, gen(div)
rename div1 midwest
label var midwest "Midwest (IL, IN, MI, OH, WI)"
rename div2 deepsouth
label var deepsouth "Southern region (AL, KY, MS, TN)"
rename div3 midatlantic
label var midatlantic "Mid Atlantic region (NJ, NY, PA)"
rename div4 mountain
label var mountain "Mountain region (AZ, CO, ID, MT, NM, NV, UT)"
rename div5 newengland
label var newengland "New England region (CT, NH, MA, ME, RI, VT)"
rename div6 pacific
label var pacific "Pacific region (CA, OR, WA)"
rename div7 southatlantic
label var southatlantic "South Atlantic region (DC, DE, FL, GA, MD, NC, SC, VA, WV)"
rename div8 northplains
label var northplains "Northern plains region (IA, KS, MN, MO, ND, NE, SD)"
rename div9 southplains
label var southplains "Southern plains region (AR, LA, OK, TX)"

* state of birth
gen birthstate = stateborn
gen migrated = (statecode!=birthstate) if !missing(statecode) & !missing(birthstate)
label var migrated "Does not reside in state of birth"
 
* work status
gen workstatus = "Employed" if currentjobstatuss1==1
replace workstatus = "Unemployed" if workstatus=="" & currentjobstatuss2==2
replace workstatus = "Retired" if workstatus=="" & currentjobstatuss5==5
replace workstatus = "Disabled" if workstatus=="" & currentjobstatuss4==4
replace workstatus = "Homemaker" if workstatus=="" & currentjobstatuss6==6
replace workstatus = "Other" if workstatus=="" & (currentjobstatuss3==3 | currentjobstatuss7==7)
label var workstatus "Current employment status"
gen employed = currentjobstatuss1==1
gen unemployed = ((currentjobstatuss2==2 | currentjobstatuss3==3) & employed==0)
gen outoflaborforce = (employed==0 & unemployed==0)
gen retired = (workstatus=="Retired")
label var retired "Retired"
gen homemaker = (workstatus=="Homemaker")
label var homemaker "Homemaker"
gen disabled = (workstatus=="Disabled")
label var disabled "Disabled"
gen otherwork = (workstatus=="Other")
label var otherwork "Other work status"
recode typework (1/11 = 0) (12/22=1), gen(bluecollar)
replace bluecollar = 0 if typework==16 | typework==17
replace bluecollar = . if typework==0
label var bluecollar "Works in blue collar job"


* marital status
gen married = (currentliving==1) if currentliving!=.
label var married "Married"
gen nevermarried = (currentliving==5) if currentliving!=.
label var nevermarried "Never married"
gen othermarital = (married==0 & nevermarried==0) if currentliving!=.
label var othermarital "Other marital status"

** merge in politics variables from other ALP modules
merge m:1 prim_key using "$dir/data/raw-data/alp_polit_char_short.dta"
drop if _m==2 // ALP subjects who are not in our data set 
gen votedromney = (vote_2012==2) if vote_2012!=.
gen votedother = (vote_2012==3) if vote_2012!=.
gen novotedata = (vote_2012==.)
label var votedobama "Voted for Obama in 2012"
label var votedromney "Voted for Romney in 2012"
label var votedother "Didn't vote for Obama or Romney in 2012"
label var novotedata "No voting data for 2012"
drop _m


sort prim_key

foreach var of varlist _all{
rename `var' `var'_1
}
rename prim_key_1 prim_key
save "$dir/data/outputs/ALP1demographics", replace
}

** merge all datasets together***
clear
use "$dir/data/outputs/ALP1withoutparams"
*keep if complete1==1
merge 1:1 prim_key using "$dir/data/outputs/ALP1demographics"
keep if _m ==3
drop _m


merge 1:1 subjectid1 using "$dir/data/outputs/CES1"
drop _m
merge 1:1 subjectid1 using "$dir/data/outputs/GARP1"
drop _m
merge 1:1 prim_key using "$dir/data/outputs/ALP2withparams"
drop _m

merge 1:1 prim_key using "$dir/data/outputs/ALP2demographics"
drop if _m == 2
drop _m 
merge 1:1 prim_key using "$dir/data/outputs/ALP2_politicaldata", keepusing(willvote_rep_prim  willvote_dem_prim Trump_preprimary1 Trump_preprimary2 ///
Clinton_preprimary1 Clinton_preprimary2 Sanders_preprimary2 Trump_postprimary Clinton_postprimary Sanders_postprimary Rep_postprimary Dem_postprimary ///
Trump_preelection1 Trump_preelection2 Clinton_preelection1 Clinton_preelection2 votedelection votedTrump votedClinton vchance*)
drop _m
* keep only people who completed the first experiment 
keep if complete1 !=.

saveold "$dir/data/outputs/ALPalldatafinal", replace


